4 Genomic data

4.1 Read duplicates

all_data %>%
    select(dataset,Extraction,duplicates,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(duplicates), sd(duplicates))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of fraction of duplicated reads")
tinytable_vjawmjput2gu5663t9ge
Mean and standard deviation of fraction of duplicated reads
Taxon ZYMO DREX EHEX
Amphibian 0.3±0.2 0.2±0.2 0.2±0.2
Reptile 0.5±0.4 0.3±0.3 0.4±0.4
Mammal 0.4±0.4 0.2±0.1 0.2±0.2
Bird 0.8±0.3 0.9±0.1 0.6±0.4
Control 1.0±0.0 0.9±0.0 1.0±0.0
all_data %>%
    select(dataset,Extraction,duplicates,Taxon, Species) %>%
    mutate(duplicates=duplicates*100) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=duplicates, color=Species, group=Extraction)) + 
        scale_y_reverse() +
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Duplication rate (%)",x="Extraction method")

all_data %>%
    select(dataset,Sample,Species,Extraction,duplicates,Taxon) %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(duplicates ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_wb7w83luwyo29s21ryfe
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 0.48640693 0.07257915 6.701744 14.33359 8.937456e-06
fixed NA ExtractionDREX -0.09279626 0.03900562 -2.379048 160.72377 1.853073e-02
fixed NA ExtractionEHEX -0.14618630 0.03918499 -3.730671 160.73104 2.644423e-04
ran_pars Sample sd__(Intercept) 0.00000000 NA NA NA NA
ran_pars Species sd__(Intercept) 0.23148121 NA NA NA NA
ran_pars Residual sd__Observation 0.21005171 NA NA NA NA

4.2 Depth of coverage

all_data %>%
    select(dataset,Extraction,coverage_depth,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(coverage_depth), sd(coverage_depth))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of fraction of duplicated reads")
tinytable_t9n0hqdbgz2f4svgir2f
Mean and standard deviation of fraction of duplicated reads
Taxon ZYMO DREX EHEX
Amphibian 0.0±0.0 0.0±0.0 0.0±0.0
Reptile 0.2±0.3 0.1±0.1 0.1±0.1
Mammal 0.7±1.2 0.3±0.4 0.5±1.0
Bird 0.4±0.8 0.6±0.5 0.8±0.8
Control 0.0±0.0 0.0±0.0 0.0±0.0
all_data %>%
    select(dataset,Extraction,coverage_depth,Taxon, Species) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=coverage_depth, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Depth of coverage",x="Extraction method")

all_data %>%
    select(dataset,Sample,Species,Extraction,coverage_depth,Taxon) %>%
    unique() %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(coverage_depth ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_q8fwrihq1ti9ow3qrn8q
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 0.329125000 0.1331063 2.47264850 20.58494 0.02222872
fixed NA ExtractionDREX -0.089791667 0.1144640 -0.78445305 48.00000 0.43662873
fixed NA ExtractionEHEX -0.002791667 0.1144640 -0.02438903 48.00000 0.98064341
ran_pars Sample sd__(Intercept) 0.408634577 NA NA NA NA
ran_pars Species sd__(Intercept) 0.224731208 NA NA NA NA
ran_pars Residual sd__Observation 0.396515070 NA NA NA NA

4.3 Breadth of coverage

all_data %>%
    select(dataset,Extraction,coverage_breadth,Taxon) %>%
    unique() %>%
    group_by(Taxon,Extraction) %>%
    summarise(value = sprintf("%.1f±%.1f", mean(coverage_breadth), sd(coverage_breadth))) %>%
    pivot_wider(names_from = Extraction, values_from = value) %>%
    tt(caption = "Mean and standard deviation of depth of host genome coverage")
tinytable_546ba6ddnf0y4c3a6hb2
Mean and standard deviation of depth of host genome coverage
Taxon ZYMO DREX EHEX
Amphibian 0.0±0.0 0.0±0.0 0.0±0.0
Reptile 4.9±7.5 3.0±5.4 2.9±3.7
Mammal 5.7±5.9 10.2±16.4 15.1±26.4
Bird 0.6±0.5 3.2±4.4 8.9±13.9
Control 0.0±0.0 0.0±0.0 0.0±0.0
all_data %>%
    select(dataset,Extraction,coverage_breadth,Taxon,Species) %>%
    unique() %>%
    ggplot(aes(x=Extraction,y=coverage_breadth, color=Species, group=Extraction)) + 
        geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") + 
        geom_jitter() + 
        scale_color_manual(values=vertebrate_colors) +
        facet_grid(. ~ Taxon, scales = "free") +
        theme_minimal() +
        labs(y="Breadth of coverage (%)",x="Extraction method")

all_data %>%
    select(dataset,Extraction,Sample,Species,coverage_breadth,Taxon) %>%
    unique() %>%
    filter(Taxon != "Control") %>%
    lmerTest::lmer(coverage_breadth ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
    broom.mixed::tidy() %>%
    tt()
tinytable_x9l6hyp23pbqdlzcg4zb
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 2.799167 2.252826 1.2425133 20.82558 0.22785670
fixed NA ExtractionDREX 1.301250 1.956426 0.6651158 48.00000 0.50915975
fixed NA ExtractionEHEX 3.918750 1.956426 2.0030144 48.00000 0.05084021
ran_pars Sample sd__(Intercept) 7.118462 NA NA NA NA
ran_pars Species sd__(Intercept) 3.549767 NA NA NA NA
ran_pars Residual sd__Observation 6.777259 NA NA NA NA

4.4 Breadth vs. coverage

all_data_log <- all_data %>%
    mutate(coverage_breadth_log=log(coverage_breadth)) %>%
    mutate(coverage_depth_log=log(coverage_depth)) 

lm_eq <- lm(coverage_breadth_log ~ coverage_depth_log, data = all_data_log %>% filter(coverage_depth_log != -Inf ,coverage_breadth_log != -Inf))
coef <- coef(lm_eq)
all_data_log$coverage_breadth_log_pred <- coef[1] + coef[2] * all_data_log$coverage_depth_log


all_data_log %>%
    select(dataset,Extraction,coverage_depth_log,coverage_breadth_log,coverage_breadth_log_pred,Taxon,Species,Sample) %>%
    unique() %>%
    ggplot(aes(x=coverage_depth_log,y=coverage_breadth_log)) + 
        geom_point(aes(color=Species, shape=Extraction),size=3, alpha=0.9) + 
        geom_segment(aes(x = coverage_depth_log, y = coverage_breadth_log, xend = coverage_depth_log, yend = coverage_breadth_log_pred, color=Species), alpha=0.9)+
        geom_smooth(method = lm, se = FALSE, color="#666666") +
        scale_color_manual(values=vertebrate_colors) +
        theme_minimal() +
        labs(y="Breadth of coverage (%)",x="Depth of coverage")